{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Pregnancies</th>\n",
       "      <th>Glucose</th>\n",
       "      <th>BloodPressure</th>\n",
       "      <th>SkinThickness</th>\n",
       "      <th>Insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>DiabetesPedigreeFunction</th>\n",
       "      <th>Age</th>\n",
       "      <th>Outcome</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>148</td>\n",
       "      <td>72</td>\n",
       "      <td>35</td>\n",
       "      <td>0</td>\n",
       "      <td>33.6</td>\n",
       "      <td>0.627</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>85</td>\n",
       "      <td>66</td>\n",
       "      <td>29</td>\n",
       "      <td>0</td>\n",
       "      <td>26.6</td>\n",
       "      <td>0.351</td>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>183</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23.3</td>\n",
       "      <td>0.672</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>89</td>\n",
       "      <td>66</td>\n",
       "      <td>23</td>\n",
       "      <td>94</td>\n",
       "      <td>28.1</td>\n",
       "      <td>0.167</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>137</td>\n",
       "      <td>40</td>\n",
       "      <td>35</td>\n",
       "      <td>168</td>\n",
       "      <td>43.1</td>\n",
       "      <td>2.288</td>\n",
       "      <td>33</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \\\n",
       "0            6      148             72             35        0  33.6   \n",
       "1            1       85             66             29        0  26.6   \n",
       "2            8      183             64              0        0  23.3   \n",
       "3            1       89             66             23       94  28.1   \n",
       "4            0      137             40             35      168  43.1   \n",
       "\n",
       "   DiabetesPedigreeFunction  Age  Outcome  \n",
       "0                     0.627   50        1  \n",
       "1                     0.351   31        0  \n",
       "2                     0.672   32        1  \n",
       "3                     0.167   21        0  \n",
       "4                     2.288   33        1  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = pd.read_csv(\"pima-indians-diabetes .csv\")\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Pregnancies</th>\n",
       "      <th>Glucose</th>\n",
       "      <th>BloodPressure</th>\n",
       "      <th>SkinThickness</th>\n",
       "      <th>Insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>DiabetesPedigreeFunction</th>\n",
       "      <th>Age</th>\n",
       "      <th>Outcome</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "      <td>768.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>3.845052</td>\n",
       "      <td>120.894531</td>\n",
       "      <td>69.105469</td>\n",
       "      <td>20.536458</td>\n",
       "      <td>79.799479</td>\n",
       "      <td>31.992578</td>\n",
       "      <td>0.471876</td>\n",
       "      <td>33.240885</td>\n",
       "      <td>0.348958</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>3.369578</td>\n",
       "      <td>31.972618</td>\n",
       "      <td>19.355807</td>\n",
       "      <td>15.952218</td>\n",
       "      <td>115.244002</td>\n",
       "      <td>7.884160</td>\n",
       "      <td>0.331329</td>\n",
       "      <td>11.760232</td>\n",
       "      <td>0.476951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.078000</td>\n",
       "      <td>21.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>99.000000</td>\n",
       "      <td>62.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>27.300000</td>\n",
       "      <td>0.243750</td>\n",
       "      <td>24.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>3.000000</td>\n",
       "      <td>117.000000</td>\n",
       "      <td>72.000000</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>30.500000</td>\n",
       "      <td>32.000000</td>\n",
       "      <td>0.372500</td>\n",
       "      <td>29.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>6.000000</td>\n",
       "      <td>140.250000</td>\n",
       "      <td>80.000000</td>\n",
       "      <td>32.000000</td>\n",
       "      <td>127.250000</td>\n",
       "      <td>36.600000</td>\n",
       "      <td>0.626250</td>\n",
       "      <td>41.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>17.000000</td>\n",
       "      <td>199.000000</td>\n",
       "      <td>122.000000</td>\n",
       "      <td>99.000000</td>\n",
       "      <td>846.000000</td>\n",
       "      <td>67.100000</td>\n",
       "      <td>2.420000</td>\n",
       "      <td>81.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Pregnancies     Glucose  BloodPressure  SkinThickness     Insulin  \\\n",
       "count   768.000000  768.000000     768.000000     768.000000  768.000000   \n",
       "mean      3.845052  120.894531      69.105469      20.536458   79.799479   \n",
       "std       3.369578   31.972618      19.355807      15.952218  115.244002   \n",
       "min       0.000000    0.000000       0.000000       0.000000    0.000000   \n",
       "25%       1.000000   99.000000      62.000000       0.000000    0.000000   \n",
       "50%       3.000000  117.000000      72.000000      23.000000   30.500000   \n",
       "75%       6.000000  140.250000      80.000000      32.000000  127.250000   \n",
       "max      17.000000  199.000000     122.000000      99.000000  846.000000   \n",
       "\n",
       "              BMI  DiabetesPedigreeFunction         Age     Outcome  \n",
       "count  768.000000                768.000000  768.000000  768.000000  \n",
       "mean    31.992578                  0.471876   33.240885    0.348958  \n",
       "std      7.884160                  0.331329   11.760232    0.476951  \n",
       "min      0.000000                  0.078000   21.000000    0.000000  \n",
       "25%     27.300000                  0.243750   24.000000    0.000000  \n",
       "50%     32.000000                  0.372500   29.000000    0.000000  \n",
       "75%     36.600000                  0.626250   41.000000    1.000000  \n",
       "max     67.100000                  2.420000   81.000000    1.000000  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看数值型特征的基本统计量\n",
    "train.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Pregnancies                              0\n",
      "Glucose                                  5\n",
      "BloodPressure                           35\n",
      "SkinThickness                          227\n",
      "Insulin                                374\n",
      "BMI                                     11\n",
      "DiabetesPedigreeFunction                 0\n",
      "Age                                      0\n",
      "Outcome                                  0\n",
      "Triceps_skin_fold_thickness_Missing      0\n",
      "serum_insulin_Missing                    0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "NaN_col_names = ['Glucose','BloodPressure','SkinThickness','Insulin','BMI']\n",
    "train[NaN_col_names] = train[NaN_col_names].replace(0, np.NaN)\n",
    "print(train.isnull().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SkinThickness</th>\n",
       "      <th>Triceps_skin_fold_thickness_Missing</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>29.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>32.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>45.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   SkinThickness  Triceps_skin_fold_thickness_Missing\n",
       "0           35.0                                    0\n",
       "1           29.0                                    0\n",
       "2            NaN                                    1\n",
       "3           23.0                                    0\n",
       "4           35.0                                    0\n",
       "5            NaN                                    1\n",
       "6           32.0                                    0\n",
       "7            NaN                                    1\n",
       "8           45.0                                    0\n",
       "9            NaN                                    1"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#缺失值比较多，干脆就开一个新的字段，表明是缺失值还是不是缺失值\n",
    "train['Triceps_skin_fold_thickness_Missing'] = train['SkinThickness'].apply(lambda x:1 if pd.isnull(x) else 0)\n",
    "train[['SkinThickness','Triceps_skin_fold_thickness_Missing']].head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x250415ab908>"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAELCAYAAADDZxFQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAGw5JREFUeJzt3X+UVXW9//Hnix869lVTZPQiA4GKXSWQdFTUSpP6aiwV7ast+JIK6KJc+NVSK6q1Uktd9kuu3tLiLn9hBpE/krxqqaReuyahd+RnJqXJqCk/rDQyZHh//zifgeP4mZkDzJ5zYF6Ptc4653z2Z+/93mdgXvPZe5+9FRGYmZm11avaBZiZWW1yQJiZWZYDwszMshwQZmaW5YAwM7MsB4SZmWU5IMzMLMsBYWZmWQ4IMzPL6lPtArZF//79Y8iQIdUuw8xsu/LUU0+tjoj6zvpt1wExZMgQFi5cWO0yzMy2K5L+VEk/72IyM7MsB4SZmWU5IMzMLGu7PgZhZtaV3n77bZqbm3nrrbeqXUqXqKuro6Ghgb59+27V/A4IM7OkubmZ3XbbjSFDhiCp2uVsk4hgzZo1NDc3M3To0K1ahncxmZklb731Fnvttdd2Hw4Akthrr722aTTkgDAzK7MjhEOrbd0WB4SZmWU5IMzMOtDc3My4ceMYNmwY+++/PxdeeCHr16/vcJ6rrrqqm6orliKi2jVstcbGxtjWb1If9oVZXVTN9u+pb59V7RLMqmr58uUcdNBBm95HBEceeSTnnXcekydPpqWlhalTp9KvXz++/e1vt7ucXXfdlTfffLM7Su5U220CkPRURDR2Nq9HEGZm7Zg/fz51dXVMnjwZgN69ezNjxgxuuukmrr/+es4///xNfU866SQeeeQRpk+fzj/+8Q9GjRrFxIkTAZg1axYjR47kkEMO4cwzzwTgT3/6E2PGjGHkyJGMGTOGF198EYBJkyZx3nnn8dGPfpT99tuPRx99lClTpnDQQQcxadKkTev75S9/yVFHHcWhhx7KGWecUUggFRYQkuokLZD0jKSlki5P7bdIel5SU3qMSu2SdJ2kFZIWSTq0qNrMzCqxdOlSDjvssHe07b777gwePJgNGzZk57n66qvZZZddaGpq4vbbb2fp0qVceeWVzJ8/n2eeeYZrr70WgPPPP5+zzjqLRYsWMXHiRC644IJNy3j99deZP38+M2bM4OSTT+bzn/88S5cuZfHixTQ1NbF69WquuOIKHnroIZ5++mkaGxu55pprunz7i/wexD+B4yPiTUl9gccl3Z+mfSEi7mjT/xPAsPQ4ErghPZuZVUVEZM8Eaq89Z/78+Zx++un0798fgH79+gHwxBNPcNdddwFw5pln8sUvfnHTPCeffDKSGDFiBPvssw8jRowAYPjw4bzwwgs0NzezbNkyjjnmGADWr1/PUUcdtfUb2o7CAiJKBzdaxzx906OjAx7jgFlpvt9I2kPSgIh4pagazcw6Mnz4cO688853tP3tb39j5cqVvPe972Xjxo2b2tv7vkGlYVLeZ+eddwagV69em163vt+wYQO9e/fm4x//OLNnz96i7dlShR6DkNRbUhPwGvBgRDyZJl2ZdiPNkNS69QOBlWWzN6c2M7OqGDNmDOvWrWPWrNLJLC0tLVx88cVMmjSJ/fbbj6amJjZu3MjKlStZsGDBpvn69u3L22+/vWkZc+fOZc2aNQCsXbsWgKOPPpo5c+YAcPvtt/OhD32o4rpGjx7Nr3/9a1asWAHAunXr+P3vf7/tG9xGoQERES0RMQpoAI6Q9AHgy8C/AocD/YAvpe65iH3XiEPSVEkLJS1ctWpVQZWbmZX+qr/77rv56U9/yrBhwzjwwAOpq6vjqquu4phjjmHo0KGMGDGCSy65hEMP3XzYdOrUqYwcOZKJEycyfPhwvvrVr3LsscdyyCGHcNFFFwFw3XXXcfPNNzNy5Ehuu+22TccmKlFfX88tt9zChAkTGDlyJKNHj+Z3v/td129/d53mKulS4O8R8Z2ytuOASyLiJEk/BB6JiNlp2rPAcR3tYvJprl3Lp7laT5c7JXR7V5OnuUqql7RHer0L8DHgd5IGpDYBpwJL0izzgLPS2Uyjgb/6+IOZWfUUeRbTAOBWSb0pBdHciLhX0nxJ9ZR2KTUBn0397wPGAiuAdcDkAmszM7NOFHkW0yLgg5n249vpH8C0ouoxM7Mt429Sm5lZlgPCzMyyHBBmZpblW46amW2Brj41vtLTyx944AEuvPBCWlpaOPfcc5k+fXqX1pHjEYSZWY1raWlh2rRp3H///SxbtozZs2ezbNmywtfrgDAzq3ELFizggAMOYL/99mOnnXZi/Pjx3HPPPYWv1wFhZlbjXnrpJQYNGrTpfUNDAy+99FLh63VAmJnVuNwlkSq93Pi2cECYmdW4hoYGVq7cfLHr5uZm9t1338LX64AwM6txhx9+OM899xzPP/8869evZ86cOZxyyimFr9enuZqZbYFqXPW4T58+fO973+OEE06gpaWFKVOmMHz48OLXW/gazMxsm40dO5axY8d26zq9i8nMzLIcEGZmluWAMDOzLAeEmZllOSDMzCzLAWFmZlk+zdXMbAu8+PURXbq8wV9b3GmfKVOmcO+997L33nuzZMmSLl1/RzyCMDOrcZMmTeKBBx7o9vUWFhCS6iQtkPSMpKWSLk/tQyU9Kek5ST+RtFNq3zm9X5GmDymqNjOz7clHPvIR+vXr1+3rLXIE8U/g+Ig4BBgFnChpNPBNYEZEDANeB85J/c8BXo+IA4AZqZ+ZmVVJYQERJW+mt33TI4DjgTtS+63Aqen1uPSeNH2MuuN6tmZmllXoMQhJvSU1Aa8BDwJ/AP4SERtSl2ZgYHo9EFgJkKb/FdiryPrMzKx9hQZERLRExCigATgCOCjXLT3nRgvvukuGpKmSFkpauGrVqq4r1szM3qFbTnONiL9IegQYDewhqU8aJTQAL6duzcAgoFlSH+C9wNrMsmYCMwEaGxvffZslM7MCVXJaalebMGECjzzyCKtXr6ahoYHLL7+cc845p/MZt1FhASGpHng7hcMuwMcoHXj+FXA6MAc4G2i98/a89P6JNH1+5O6zZ2bWw8yePbsq6y1yBDEAuFVSb0q7suZGxL2SlgFzJF0B/A9wY+p/I3CbpBWURg7jC6zNzMw6UVhARMQi4IOZ9j9SOh7Rtv0t4Iyi6jEzsy3jb1KbmZXZkfZsb+u2OCDMzJK6ujrWrFmzQ4RERLBmzRrq6uq2ehm+WJ+ZWdLQ0EBzczM7yin0dXV1NDQ0bPX8Dggzs6Rv374MHTq02mXUDO9iMjOzLAeEmZllOSDMzCzLAWFmZlkOCDMzy3JAmJlZlgPCzMyyHBBmZpblgDAzsywHhJmZZTkgzMwsywFhZmZZDggzM8tyQJiZWZYDwszMshwQZmaWVVhASBok6VeSlktaKunC1H6ZpJckNaXH2LJ5vixphaRnJZ1QVG1mZta5Iu8otwG4OCKelrQb8JSkB9O0GRHxnfLOkg4GxgPDgX2BhyQdGBEtBdZoZmbtKGwEERGvRMTT6fUbwHJgYAezjAPmRMQ/I+J5YAVwRFH1mZlZx7rlGISkIcAHgSdT0/mSFkm6SdKeqW0gsLJstmYygSJpqqSFkhbuKDcWNzOrRYUHhKRdgTuBz0XE34AbgP2BUcArwHdbu2Zmj3c1RMyMiMaIaKyvry+oajMzKzQgJPWlFA63R8RdABHxakS0RMRG4D/YvBupGRhUNnsD8HKR9ZmZWfuKPItJwI3A8oi4pqx9QFm304Al6fU8YLyknSUNBYYBC4qqz8zMOlbkWUzHAGcCiyU1pbavABMkjaK0++gF4DMAEbFU0lxgGaUzoKb5DCYzs+opLCAi4nHyxxXu62CeK4Eri6rJzMwq529Sm5lZlgPCzMyyHBBmZpblgDAzsywHhJmZZTkgzMwsywFhZmZZDggzM8tyQJiZWZYDwszMshwQZmaW5YAwM7MsB4SZmWU5IMzMLMsBYWZmWQ4IMzPLckCYmVlWRQEh6eFK2szMbMfR4S1HJdUB7wH6S9qTzbcQ3R3Yt+DazMysijobQXwGeAr41/Tc+rgH+H5HM0oaJOlXkpZLWirpwtTeT9KDkp5Lz3umdkm6TtIKSYskHbqtG2dmZluvw4CIiGsjYihwSUTsFxFD0+OQiPheJ8veAFwcEQcBo4Fpkg4GpgMPR8Qw4OH0HuATwLD0mArcsPWbZWZm26rDXUytIuLfJR0NDCmfJyJmdTDPK8Ar6fUbkpYDA4FxwHGp263AI8CXUvusiAjgN5L2kDQgLcfMzLpZRQEh6TZgf6AJaEnNAbQbEG3mHwJ8EHgS2Kf1l35EvCJp79RtILCybLbm1OaAMDOrgooCAmgEDk5/3W8RSbsCdwKfi4i/SWq3a6btXeuTNJXSLigGDx68peWYmVmFKv0exBLgX7Z04ZL6UgqH2yPirtT8qqQBafoA4LXU3gwMKpu9AXi57TIjYmZENEZEY319/ZaWZGZmFao0IPoDyyT9QtK81kdHM6g0VLgRWB4R15RNmgecnV6fTemMqNb2s9LZTKOBv/r4g5lZ9VS6i+myrVj2McCZwGJJTantK8DVwFxJ5wAvAmekafcBY4EVwDpg8las08zMukilZzE9uqULjojHyR9XABiT6R/AtC1dj5mZFaPSs5jeYPMB452AvsDfI2L3ogozM7PqqnQEsVv5e0mnAkcUUpGZmdWErbqaa0T8DDi+i2sxM7MaUukupk+Wve1F6XsRW/ydCDMz235UehbTyWWvNwAvULo0hpmZ7aAqPQbhU07NzHqYSm8Y1CDpbkmvSXpV0p2SGoouzszMqqfSXUw3Az9m85faPp3aPl5EUVYdL359RLVLqBmDv7a42iWYVV2lZzHVR8TNEbEhPW4BfCEkM7MdWKUBsVrSpyX1To9PA2uKLMzMzKqr0oCYAnwK+DOl+zOcjq+VZGa2Q6v0GMQ3gLMj4nUo3Vca+A6l4DAzsx1QpSOIka3hABARayndIc7MzHZQlQZEL0l7tr5JI4hKRx9mZrYdqvSX/HeB/5Z0B6VLbHwKuLKwqszMrOoq/Sb1LEkLKV2gT8AnI2JZoZWZmVlVVbybKAWCQ8HMrIfYqst9m5nZjs8BYWZmWQ4IMzPLKiwgJN2Urv66pKztMkkvSWpKj7Fl074saYWkZyWdUFRdZmZWmSJHELcAJ2baZ0TEqPS4D0DSwcB4YHia53pJvQuszczMOlFYQETEY8DaCruPA+ZExD8j4nlgBXBEUbWZmVnnqnEM4nxJi9IuqNZvZw8EVpb1aU5tZmZWJd0dEDcA+wOjKF0V9rupXZm+kVuApKmSFkpauGrVqmKqNDOz7g2IiHg1IloiYiPwH2zejdQMDCrr2gC83M4yZkZEY0Q01tf7nkVmZkXp1oCQNKDs7WlA6xlO84DxknaWNBQYBizoztrMzOydCrsiq6TZwHFAf0nNwKXAcZJGUdp99ALwGYCIWCppLqVLeWwApkVES1G1mZlZ5woLiIiYkGm+sYP+V+IrxJqZ1Qx/k9rMzLJ80x+zGnXYF2ZVu4Sa8dS3z6p2CT2SRxBmZpblgDAzsywHhJmZZTkgzMwsywFhZmZZDggzM8tyQJiZWZYDwszMshwQZmaW5YAwM7MsB4SZmWU5IMzMLMsBYWZmWQ4IMzPLckCYmVmWA8LMzLIcEGZmluWAMDOzrMICQtJNkl6TtKSsrZ+kByU9l573TO2SdJ2kFZIWSTq0qLrMzKwyRY4gbgFObNM2HXg4IoYBD6f3AJ8AhqXHVOCGAusyM7MKFBYQEfEYsLZN8zjg1vT6VuDUsvZZUfIbYA9JA4qqzczMOtfdxyD2iYhXANLz3ql9ILCyrF9zajMzsyqplYPUyrRFtqM0VdJCSQtXrVpVcFlmZj1XdwfEq627jtLza6m9GRhU1q8BeDm3gIiYGRGNEdFYX19faLFmZj1ZdwfEPODs9Pps4J6y9rPS2Uyjgb+27ooyM7Pq6FPUgiXNBo4D+ktqBi4FrgbmSjoHeBE4I3W/DxgLrADWAZOLqsvMzCpTWEBExIR2Jo3J9A1gWlG1mJnZlquVg9RmZlZjChtBmJl1lRe/PqLaJdSMwV9b3G3r8gjCzMyyHBBmZpblgDAzsywHhJmZZTkgzMwsywFhZmZZDggzM8tyQJiZWZYDwszMshwQZmaW5YAwM7MsB4SZmWU5IMzMLMsBYWZmWQ4IMzPLckCYmVmWA8LMzLKqckc5SS8AbwAtwIaIaJTUD/gJMAR4AfhURLxejfrMzKy6I4iPRsSoiGhM76cDD0fEMODh9N7MzKqklnYxjQNuTa9vBU6tYi1mZj1etQIigF9KekrS1NS2T0S8ApCe965SbWZmRpWOQQDHRMTLkvYGHpT0u0pnTIEyFWDw4MFF1Wdm1uNVZQQRES+n59eAu4EjgFclDQBIz6+1M+/MiGiMiMb6+vruKtnMrMfp9oCQ9L8k7db6GvjfwBJgHnB26nY2cE9312ZmZptVYxfTPsDdklrX/+OIeEDSb4G5ks4BXgTOqEJtZmaWdHtARMQfgUMy7WuAMd1dj5mZ5dXSaa5mZlZDHBBmZpblgDAzsywHhJmZZTkgzMwsywFhZmZZDggzM8tyQJiZWZYDwszMshwQZmaW5YAwM7MsB4SZmWU5IMzMLMsBYWZmWQ4IMzPLckCYmVmWA8LMzLIcEGZmluWAMDOzLAeEmZll1VxASDpR0rOSVkiaXu16zMx6qpoKCEm9ge8DnwAOBiZIOri6VZmZ9Uw1FRDAEcCKiPhjRKwH5gDjqlyTmVmPVGsBMRBYWfa+ObWZmVk361PtAtpQpi3e0UGaCkxNb9+U9GzhVfUQ74P+wOpq11ETLs39U7Rq8b/NMl3zb/N9lXSqtYBoBgaVvW8AXi7vEBEzgZndWVRPIWlhRDRWuw6ztvxvszpqbRfTb4FhkoZK2gkYD8yrck1mZj1STY0gImKDpPOBXwC9gZsiYmmVyzIz65FqKiAAIuI+4L5q19FDeded1Sr/26wCRUTnvczMrMeptWMQZmZWIxwQ5subWM2SdJOk1yQtqXYtPZEDoofz5U2sxt0CnFjtInoqB4T58iZWsyLiMWBttevoqRwQ5submFmWA8I6vbyJmfVMDgjr9PImZtYzOSDMlzcxsywHRA8XERuA1subLAfm+vImViskzQaeAN4vqVnSOdWuqSfxN6nNzCzLIwgzM8tyQJiZWZYDwszMshwQZmaW5YAwM7MsB4SZmWU5IHoYSXtJakqPP0t6qez9Tm36/kLSbtWqtS1Jj0salWnfqjolHSzpGUn/I2lIO336SPpLO9N+JOnUDpZ/kaS6CpYzTdLEDpbzMUk/62hbuoOkcyWFpGPL2s5Ibaem9zdLev8WLvc0SV/o6npt29XcLUetWBGxBhgFIOky4M2I+E55H0mi9B2ZE7q/wi23DXV+ErgjIr7RlfWUuQi4CXiro04R8f2C1l+ExcAE4NH0fjzwTOvEiJi8pQuMiLu7pjTrah5BGACSDpC0RNIPgKeBAembq3uk6ZMlLUp/cd+c2vaRdJekhZIWSBqd2q+QdKukX0l6TtKU1D4wjQKa0rqObqeWPpJuk7Q49bugzfTe6a/3y9L7Zkl7lG3DjZKWSrq/9S/4zDpOofQN8s9Keii1fTHNv0TS/8vM00vS9ZKWSfo50L+Dz/PzwN7Af7UuP7VfnT7DJyTtXfZ5fS69PlDS/NTn6bYjG0lHtran+W6U9KikP0qaVtbv7PQzaUo192rvc5X0+bRNz0j6UXvblDwCHJ2WtTswGNh0M5/WUd6WrCuNTP4tvf6RpGsl/XfaptNSe29JP0g/159LekAdjN6sa3gEYeUOBiZHxGcBSgMJkHQI8CXg6IhYK6lf6n8d8K2I+E36RXYv8IE0bQRwNLA78LSk/wQ+Dfw8Ir6p0o2KdmmnjsOA/hExIq1/j7JpfYAfA09HxDcz874fmBARiyXdBZxK6R4X7xAR8yQdAayOiH9LrydSuj9Gb2CBpEeBZWWznQ4MTdu4b5r2g9wGRMQMSRcDH46Iv0jqA7wXeDQipku6BpgCXN1m1tnAZRHx8xRuvYAD0ufwYWAGcEpENKefz4HAGGAPYLlKAX8QcBqln9cGSTMp/aX/h3Y+1y8C74uI9W0+65yNlELiY8A+wM/S+tpq72dYybr2Bo6h9G9oLnA3cAaly9CPAP6F0mVhsp+9dR2PIKzcHyLit5n244GfRMRagNZnSr8kfiCpidIvij0ltf7S/1lEvBURrwGPAYdTujDguZIuBT4QEW+2U8cKStfeuVbSCcBfy6bdSPvhAKWbHy1Or58ChnSyza0+DNwZEesi4o20PR9q0+cjwOyI2BgRzZR+UW6Jf0TE/e3VJmlPSr9Ufw6QPr91afIHgOuBk9K6W90bEevT57wWqKf0czkcWJh+NscC+9P+57oU+JFKx0HermA75lAKnPFkwjfZlnX9LEoWsfneJB+idJ2wjRHxMpt3cVmBHBBW7u/ttIv8PSIEHBERo9JjYET8I01r2z8iYj5wHPAKcLvaOTCbjpOMBB4HLgB+WDb518AYSTu3U+s/y163UPkoOXdfjGx5FfbLWV/2ur3a2lv+y2n+tgfpc9sr4Kayn8v7I+IbHXyuJ1D6a/wISqHSu5PteAI4FNg9Iv6Q67CN6yrfJrV5tm7kgLBKPASMb921VLaL6SGgfL93+S+vUyXtLKk/pb/OF0p6H/DniJhJ6V7DH8ytTFI9pYPkPwUupfTLqNXMtN45abdNV3kMOE3SLpJ2pXTb1f/K9Bmf9ucPpPSXeUfeACo+uyoiXgdWSzoZQFKdpPekyWuBk4BvpV1NHXkI+FT67FvPXBuc+1zTL+iGFN5foDQCeU97C051BvBl4Cvt9emqdZV5HDhdJQMojeasYD4GYZ2KiEWSvgU8JmkDpd0j51AKhxskTab0b+lXbA6M3wL3U7oZ0aUR8apKB6svkvQ28CalYxI5g4AbVdrJHpSOf5TX8y1JVwK3SDqri7ZxgUqXlm7dxXZDOo5R/n/kDuCjlA7KPkspMDoyE3hI0krgxApLmQj8MG3feuD/lNX4ikoH1+/raLtT3ZendfeitCvns5RGGG0/1z7Aj1U6TbgX8M20i61DEfGfnXTJ/Qyz62o91tWJuZR2dbZ+9k/yzl2PVgBf7tu6nKQrSAd/q12L7Tgk7RoRb6bRyZPAkRGxqtp17cg8gjCz7cX96dTavpRGpQ6HgnkEYVUlaSHv/kPl/0bEslz/rVzHD4DRbZqviYhZXbT8eZS+D1Dukoh4KNe/1kk6l9J3RMo9FhEX5PrbjssBYWZmWT6LyczMshwQZmaW5YAwM7MsB4SZmWU5IMzMLOv/Az2gHzCuuLrlAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "#color = sns.color_palette()\n",
    "\n",
    "%matplotlib inline\n",
    "sns.countplot(x=\"Triceps_skin_fold_thickness_Missing\", hue=\"Outcome\",data=train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x25041580748>"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAELCAYAAADDZxFQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAFgJJREFUeJzt3X+QXWWd5/H3lxBoRxAJCQzQYBIMsyTmh9AgiOOPyTpiSgi64JCKQAhOZlhYEWGdjG4pOsKwhcLCOLiFihAHiXGBgWEpBMmApYvGhI35AQhZQdLAQAgUyARM0vnuH/d0uISnk9tJ37436fer6ta957nPc8733Orqzz0/b2QmkiRtabdWFyBJak8GhCSpyICQJBUZEJKkIgNCklRkQEiSigwISVKRASFJKjIgJElFu7e6gB0xcuTIHD16dKvLkKSdypIlS57PzFHb6rdTB8To0aNZvHhxq8uQpJ1KRPyukX7uYpIkFRkQkqQiA0KSVLRTH4OQpIG0YcMGuru7ee2111pdyoDo6Oigs7OT4cOHb9d4A0KSKt3d3ey9996MHj2aiGh1OTskM1m7di3d3d2MGTNmu+bhLiZJqrz22mvst99+O304AEQE++233w5tDRkQklRnVwiHXju6LgaEJKnIgJCkreju7mb69OmMGzeOww47jPPPP5/169dvdcyll146SNU1V2Rmq2vYbl1dXbmjV1If9V/nDVA1O78ll5/R6hKklnr44Yc54ogjNk9nJu95z3s455xzOOuss+jp6WHOnDmMGDGCyy+/vM/57LXXXrzyyiuDUfI2bblOABGxJDO7tjXWLQhJ6sPChQvp6OjgrLPOAmDYsGFceeWVXHfddVxzzTWcd955m/t+7GMf47777mPu3Lm8+uqrTJkyhZkzZwIwb948Jk2axOTJkzn99NMB+N3vfsfUqVOZNGkSU6dO5cknnwRg1qxZnHPOOXzoQx9i7Nix3H///cyePZsjjjiCWbNmbV7e3XffzXHHHceRRx7Jqaee2pRAMiAkqQ8rV67kqKOOekPb2972Ng499FA2btxYHHPZZZfxlre8haVLl3LjjTeycuVKLrnkEhYuXMivf/1rrrrqKgDOO+88zjjjDJYtW8bMmTP5zGc+s3keL774IgsXLuTKK6/kxBNP5IILLmDlypUsX76cpUuX8vzzz/O1r32Nn/zkJzz44IN0dXVxxRVXDPj6ex2EJPUhM4tnAvXVXrJw4UJOOeUURo4cCcCIESMAeOCBB7jlllsAOP300/n85z+/ecyJJ55IRDBx4kQOOOAAJk6cCMCECRN44okn6O7u5qGHHuL4448HYP369Rx33HHbv6J9MCAkqQ8TJkzg5ptvfkPbyy+/zOrVq9lnn33YtGnT5va+rjdoNEzq++y5554A7Lbbbptf905v3LiRYcOG8eEPf5ibbrqpX+vTX+5ikqQ+TJ06lXXr1jFvXu1klp6eHi688EJmzZrF2LFjWbp0KZs2bWL16tUsWrRo87jhw4ezYcOGzfNYsGABa9euBeCFF14A4L3vfS/z588H4MYbb+R973tfw3Ude+yx/PznP2fVqlUArFu3jkcffXTHV3gLBoQk9SEiuPXWW/nRj37EuHHjOPzww+no6ODSSy/l+OOPZ8yYMUycOJGLLrqII488cvO4OXPmMGnSJGbOnMmECRP44he/yAc+8AEmT57M5z73OQCuvvpqvve97zFp0iS+//3vbz420YhRo0Zx/fXXM2PGDCZNmsSxxx7LI488MvDr36zTXCPiEGAe8MfAJuDazLwqIi4G/hJYU3X9QmbeWY35W+BsoAf4TGb+eGvL8DTXgeVprhrqSqeE7ux25DTXZh6D2AhcmJkPRsTewJKIuKd678rM/Hp954gYD5wGTAAOAn4SEYdnZk8Ta5Qk9aFpu5gy85nMfLB6/XvgYeDgrQyZDszPzD9k5uPAKuCYZtUnSdq6QTkGERGjgXcDv6yazouIZRFxXUTsW7UdDKyuG9bN1gNFktRETQ+IiNgLuBn4bGa+DHwLOAyYAjwDfKO3a2H4mw6QRMSciFgcEYvXrFlTGCJJGghNDYiIGE4tHG7MzFsAMvPZzOzJzE3At3l9N1I3cEjd8E7g6S3nmZnXZmZXZnaNGjWqmeVL0pDWtICI2lUf3wUezswr6toPrOv2cWBF9fp24LSI2DMixgDjgEVIklqimWcxHQ+cDiyPiKVV2xeAGRExhdruoyeAvwLIzJURsQB4iNoZUOd6BpOkdjPQp8Y3enr5XXfdxfnnn09PTw+f/vSnmTt37oDWUdK0gMjMn1E+rnDnVsZcAlzSrJokaWfU09PDueeeyz333ENnZydHH300J510EuPHj2/qcr2SWpLa3KJFi3jnO9/J2LFj2WOPPTjttNO47bbbmr5cA0KS2txTTz3FIYe8fg5PZ2cnTz31VNOXa0BIUpsr3RKp0duN7wgDQpLaXGdnJ6tXv34dcXd3NwcddFDTl2tASFKbO/roo3nsscd4/PHHWb9+PfPnz+ekk05q+nL9wSBJ6odW3PV4991355vf/CYf+chH6OnpYfbs2UyYMKH5y236EiRtF29F/zpvRQ/Tpk1j2rRpg7pMdzFJkooMCElSkQEhSSoyICRJRQaEJKnIgJAkFXmaqyT1w5NfnTig8zv0S8u32Wf27Nnccccd7L///qxYsWKb/QeKWxCS1OZmzZrFXXfdNejLNSAkqc29//3vZ8SIEYO+XANCklRkQEiSigwISVKRASFJKvI0V0nqh0ZOSx1oM2bM4L777uP555+ns7OTr3zlK5x99tlNX64BIUlt7qabbmrJct3FJEkqMiAkSUUGhCTVycxWlzBgdnRdDAhJqnR0dLB27dpdIiQyk7Vr19LR0bHd8/AgtSRVOjs76e7uZs2aNa0uZUB0dHTQ2dm53eMNCEmqDB8+nDFjxrS6jLbhLiZJUpEBIUkqalpARMQhEfGvEfFwRKyMiPOr9hERcU9EPFY971u1R0RcHRGrImJZRBzZrNokSdvWzC2IjcCFmXkEcCxwbkSMB+YC92bmOODeahrgo8C46jEH+FYTa5MkbUPTAiIzn8nMB6vXvwceBg4GpgM3VN1uAE6uXk8H5mXNL4C3R8SBzapPkrR1g3IMIiJGA+8GfgkckJnPQC1EgP2rbgcDq+uGdVdtkqQWaHpARMRewM3AZzPz5a11LbS96WqViJgTEYsjYvGucq6yJLWjpgZERAynFg43ZuYtVfOzvbuOqufnqvZu4JC64Z3A01vOMzOvzcyuzOwaNWpU84qXpCGumWcxBfBd4OHMvKLurduBM6vXZwK31bWfUZ3NdCzwUu+uKEnS4GvmldTHA6cDyyNiadX2BeAyYEFEnA08CZxavXcnMA1YBawDzmpibZKkbWhaQGTmzygfVwCYWuifwLnNqkeS1D9eSS1JKjIgJElFBoQkqciAkCQVGRCSpCIDQpJUZEBIkooMCElSkQEhSSoyICRJRQaEJKnIgJAkFRkQkqQiA0KSVGRASJKKDAhJUpEBIUkqMiAkSUUGhCSpyICQJBUZEJKkIgNCklRkQEiSigwISVKRASFJKjIgJElFBoQkqciAkCQVGRCSpCIDQpJU1LSAiIjrIuK5iFhR13ZxRDwVEUurx7S69/42IlZFxG8i4iPNqkuS1JiGAiIi7m2kbQvXAycU2q/MzCnV485qXuOB04AJ1ZhrImJYI7VJkppjqwERER0RMQIYGRH7RsSI6jEaOGhrYzPzp8ALDdYxHZifmX/IzMeBVcAxDY6VJDXBtrYg/gpYAvyH6rn3cRvwj9u5zPMiYlm1C2rfqu1gYHVdn+6qTZLUIlsNiMy8KjPHABdl5tjMHFM9JmfmN7djed8CDgOmAM8A36jao7T40gwiYk5ELI6IxWvWrNmOEiRJjdi9kU6Z+Q8R8V5gdP2YzJzXn4Vl5rO9ryPi28Ad1WQ3cEhd107g6T7mcS1wLUBXV1cxRCRJO66hgIiI71P75r8U6KmaE+hXQETEgZn5TDX5caD3DKfbgR9ExBXUjm2MAxb1Z96SpIHVUEAAXcD4zGz4G3tE3AR8kNoB7m7gy8AHI2IKtXB5gtoxDjJzZUQsAB4CNgLnZmZPab6SpMHRaECsAP6Y2nGDhmTmjELzd7fS/xLgkkbnL0lqrkYDYiTwUEQsAv7Q25iZJzWlKklSyzUaEBc3swhJUvtp9Cym+5tdiCSpvTR6FtPvef26hD2A4cC/Z+bbmlWYJKm1Gt2C2Lt+OiJOxlthSNIurdFjEG+Qmf8cEXMHuhhJKnnyqxNbXULbOPRLywdtWY3uYvpE3eRu1K6L8CpmSdqFNboFcWLd643ULnKbPuDVSJLaRqPHIM5qdiGSpPbS6A8GdUbErdUvxD0bETdHRGezi5MktU6jPzn6PWo31DuI2u80/EvVJknaRTV6DGJUZtYHwvUR8dlmFKTW8UyR1w3mmSJSu2p0C+L5iPhURAyrHp8C1jazMElSazUaELOBTwL/Ru2OrqcAHriWpF1Yo7uY/g44MzNfBIiIEcDXqQWHJGkX1OgWxKTecADIzBeAdzenJElSO2g0IHaLiH17J6otiO26TYckaefQ6D/5bwD/JyL+F7VbbHwSf/1NknZpjV5JPS8iFgN/BgTwicx8qKmVSZJaquHdRFUgGAqSNEQ0egxCkjTEGBCSpCIDQpJUZEBIkooMCElSkQEhSSoyICRJRQaEJKnIgJAkFRkQkqQiA0KSVNS0gIiI6yLiuYhYUdc2IiLuiYjHqud9q/aIiKsjYlVELIuII5tVlySpMc3cgrgeOGGLtrnAvZk5Dri3mgb4KDCueswBvtXEuiRJDWhaQGTmT4EXtmieDtxQvb4BOLmufV7W/AJ4e0Qc2KzaJEnbNtjHIA7IzGcAquf9q/aDgdV1/bqrtjeJiDkRsTgiFq9Zs6apxUrSUNYuB6mj0Jaljpl5bWZ2ZWbXqFGjmlyWJA1dgx0Qz/buOqqen6vau4FD6vp1Ak8Pcm2SpDqDHRC3A2dWr88EbqtrP6M6m+lY4KXeXVGSpNZo+CdH+ysibgI+CIyMiG7gy8BlwIKIOBt4Eji16n4nMA1YBawDzmpWXZKkxjQtIDJzRh9vTS30TeDcZtUiSeq/djlILUlqMwaEJKnIgJAkFRkQkqQiA0KSVGRASJKKDAhJUpEBIUkqMiAkSUUGhCSpyICQJBUZEJKkIgNCklRkQEiSigwISVKRASFJKjIgJElFBoQkqciAkCQVGRCSpCIDQpJUZEBIkooMCElSkQEhSSoyICRJRQaEJKnIgJAkFRkQkqQiA0KSVGRASJKKdm/FQiPiCeD3QA+wMTO7ImIE8ENgNPAE8MnMfLEV9UmSWrsF8aHMnJKZXdX0XODezBwH3FtNS5JapJ12MU0Hbqhe3wCc3MJaJGnIa1VAJHB3RCyJiDlV2wGZ+QxA9bx/i2qTJNGiYxDA8Zn5dETsD9wTEY80OrAKlDkAhx56aLPqk6QhryVbEJn5dPX8HHArcAzwbEQcCFA9P9fH2Gszsyszu0aNGjVYJUvSkDPoARERb42IvXtfA38OrABuB86sup0J3DbYtUmSXteKXUwHALdGRO/yf5CZd0XEr4AFEXE28CRwagtqkyRVBj0gMvO3wORC+1pg6mDXI0kqa6fTXCVJbcSAkCQVGRCSpCIDQpJUZEBIkooMCElSkQEhSSoyICRJRQaEJKnIgJAkFRkQkqQiA0KSVGRASJKKDAhJUpEBIUkqMiAkSUUGhCSpyICQJBUZEJKkIgNCklRkQEiSigwISVKRASFJKjIgJElFBoQkqciAkCQVGRCSpCIDQpJUZEBIkooMCElSUdsFREScEBG/iYhVETG31fVI0lDVVgEREcOAfwQ+CowHZkTE+NZWJUlDU1sFBHAMsCozf5uZ64H5wPQW1yRJQ1K7BcTBwOq66e6qTZI0yHZvdQFbiEJbvqFDxBxgTjX5SkT8pulVDRHvgJHA862uoy18ufSnqFbxb7POwPxtvqORTu0WEN3AIXXTncDT9R0y81rg2sEsaqiIiMWZ2dXqOqQt+bfZGu22i+lXwLiIGBMRewCnAbe3uCZJGpLaagsiMzdGxHnAj4FhwHWZubLFZUnSkNRWAQGQmXcCd7a6jiHKXXdqV/5ttkBk5rZ7SZKGnHY7BiFJahMGhLy9idpWRFwXEc9FxIpW1zIUGRBDnLc3UZu7Hjih1UUMVQaEvL2J2lZm/hR4odV1DFUGhLy9iaQiA0LbvL2JpKHJgNA2b28iaWgyIOTtTSQVGRBDXGZuBHpvb/IwsMDbm6hdRMRNwAPAn0REd0Sc3eqahhKvpJYkFbkFIUkqMiAkSUUGhCSpyICQJBUZEJKkIgNCklRkQEiViPjriDhjgOd5fUScUr3+zvbcKTciLo6IjIh31rVdULV1VdN3RsTb+znfAV9f7Vra7idHpW2JiN2rC/wGVGb+z4Ge5xbz//QODF9O7Sr3r1XTpwAP1c172nbU09T11c7PLQi1TES8NSL+d0T8OiJWRMRfRMRREXF/RCyJiB9HxIFV3/si4tKIuB84v/6befX+K9XzB6vxCyLi0Yi4LCJmRsSiiFgeEYdtpZ6LI+KiuuX992rcoxHxp1X7hKptaUQsi4hxETG6/gdtIuKiiLi4MP/76r7xvxIRl1Tr/ouIOGAbH9c/U92GPSLGAi8Ba+rm/UREjCx9ptX7l0XEQ1XNX+/H+v5R9Vkui4gfRsQve9dBuz4DQq10AvB0Zk7OzHcBdwH/AJySmUcB1wGX1PV/e2Z+IDO/sY35TgbOByYCpwOHZ+YxwHeA/9KP+navxn0W+HLV9tfAVZk5BeiidrPD7fFW4BeZORn4KfCX2+j/MrA6It4FzAB+2Ee/N32mETEC+DgwITMn8fpWyJZK6/ufgRercX8HHNXY6mlXYEColZYD/7H65vqn1O4q+y7gnohYCvw3aneX7dXXP8Ut/Sozn8nMPwD/D7i7bnmj+1HfLdXzkrpxDwBfiIi/Ad6Rma/2Y3711gN3FOa/NfOp7WY6Gbi1jz5v+Ewz8yVq4fIa8J2I+ASwro+xpfV9X7VcMnMFsKyBOrWLMCDUMpn5KLVvpMuBvwf+E7AyM6dUj4mZ+ed1Q/697vVGqr/fiAhgj7r3/lD3elPd9Cb6d9ytd1xP77jM/AFwEvAq8OOI+LP6WiodDcx7Q75+I7TN89+Gf6G2RfRkZr5c6rDlZxoRX6qO1xwD3EwtXO7qY/5vWl/KvxeiIcKAUMtExEHAusz8J+DrwHuAURFxXPX+8IiY0MfwJ3h9d8d0YHiTy6WqaSzw28y8mtpt0ScBzwL7R8R+EbEn8LFmLLvaWvkb3rjbbcv6tvxMj4yIvYB9MvNOaruPpvRjsT8DPlnNezy13XYaIjyLSa00Ebg8IjYBG4BzqH0bvzoi9qH29/k/gNLtx78N3BYRi4B7eePWRTP9BfCpiNgA/Bvw1czcEBFfBX4JPA480qyFZ+b8bXQpfaZ7U/usOqhtEVzQj0VeA9wQEcuA/0ttF9NL/S5cOyVv9y2pTxExDBiema9VZ4DdS+2g//oWl6ZB4BaEpK35I+BfI2I4ta2PcwyHocMtCA05EfFF4NQtmn+UmX3u2x8M7VqXhi4DQpJU5FlMkqQiA0KSVGRASJKKDAhJUpEBIUkq+v+KrBp4u/7N3AAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#缺失值比较多，干脆就开一个新的字段，表明是缺失值还是不是缺失值\n",
    "train['serum_insulin_Missing'] = train['Insulin'].apply(lambda x: 1 if pd.isnull(x) else 0)\n",
    "sns.countplot(x=\"serum_insulin_Missing\", hue=\"Outcome\",data=train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "\"['Triceps_skin_fold_thickness_Missing' 'serum_insulin_Missing'] not found in axis\"",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-40-827a733fcaf2>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtrain\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Triceps_skin_fold_thickness_Missing'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'serum_insulin_Missing'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32mF:\\deeplearn\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mdrop\u001b[1;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[0;32m   3695\u001b[0m                                            \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3696\u001b[0m                                            \u001b[0mlevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3697\u001b[1;33m                                            errors=errors)\n\u001b[0m\u001b[0;32m   3698\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3699\u001b[0m     @rewrite_axis_style_signature('mapper', [('copy', True),\n",
      "\u001b[1;32mF:\\deeplearn\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mdrop\u001b[1;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[0;32m   3109\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlabels\u001b[0m \u001b[1;32min\u001b[0m \u001b[0maxes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3110\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3111\u001b[1;33m                 \u001b[0mobj\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_drop_axis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3112\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3113\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0minplace\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mF:\\deeplearn\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m_drop_axis\u001b[1;34m(self, labels, axis, level, errors)\u001b[0m\n\u001b[0;32m   3141\u001b[0m                 \u001b[0mnew_axis\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3142\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3143\u001b[1;33m                 \u001b[0mnew_axis\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3144\u001b[0m             \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreindex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[1;33m{\u001b[0m\u001b[0maxis_name\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mnew_axis\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3145\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mF:\\deeplearn\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mdrop\u001b[1;34m(self, labels, errors)\u001b[0m\n\u001b[0;32m   4402\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;34m'ignore'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4403\u001b[0m                 raise KeyError(\n\u001b[1;32m-> 4404\u001b[1;33m                     '{} not found in axis'.format(labels[mask]))\n\u001b[0m\u001b[0;32m   4405\u001b[0m             \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m~\u001b[0m\u001b[0mmask\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4406\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyError\u001b[0m: \"['Triceps_skin_fold_thickness_Missing' 'serum_insulin_Missing'] not found in axis\""
     ]
    }
   ],
   "source": [
    "train.drop(['Triceps_skin_fold_thickness_Missing', 'serum_insulin_Missing'], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Pregnancies                 0\n",
      "Glucose                     0\n",
      "BloodPressure               0\n",
      "SkinThickness               0\n",
      "Insulin                     0\n",
      "BMI                         0\n",
      "DiabetesPedigreeFunction    0\n",
      "Age                         0\n",
      "Outcome                     0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "medians = train.median() \n",
    "train = train.fillna(medians)\n",
    "\n",
    "print(train.isnull().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "#存为csv格式\n",
    "\n",
    "train.to_csv('FE_pima-indians-diabetes.csv',index = False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
